This notebook analyzes gene expression in microglia subclusters across multiple brain regions (SN, PUT, PFC, AMY) in the ASAP-PMDBS snRNA-seq dataset. The workflow focuses on: 1. Data preparation: loading gene-level counts from trusTEr output, organizing by cluster and region. 2. DEA per cluster: identifying marker genes that characterize each microglia cluster (to better characterize them). 3. PD vs Control analysis: testing differential expression between PD and control within each cluster and region. 4. Functional focus: visualizing interferon (IFN)-related signatures across clusters. 5. Visualization: generating mean-difference plots, violin/boxplots, and UMAP overlays for selected genes (e.g., CIITA, IFNAR1, TLR4, LRRK2).
Load all required libraries and custom DEA functions
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
## tapply, union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:SummarizedExperiment':
##
## shift
## The following object is masked from 'package:GenomicRanges':
##
## shift
## The following object is masked from 'package:IRanges':
##
## shift
## The following objects are masked from 'package:S4Vectors':
##
## first, second
library(ggpubr)
## Loading required package: ggplot2
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first(), S4Vectors::first()
## ✖ lubridate::hour() masks data.table::hour()
## ✖ lubridate::isoweek() masks data.table::isoweek()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ lubridate::mday() masks data.table::mday()
## ✖ lubridate::minute() masks data.table::minute()
## ✖ lubridate::month() masks data.table::month()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ lubridate::quarter() masks data.table::quarter()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks data.table::second(), S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice() masks IRanges::slice()
## ✖ purrr::transpose() masks data.table::transpose()
## ✖ lubridate::wday() masks data.table::wday()
## ✖ lubridate::week() masks data.table::week()
## ✖ lubridate::yday() masks data.table::yday()
## ✖ lubridate::year() masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openxlsx)
source("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/processing/TE_DEA_functions.R")
samplesheet <- read.xlsx("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/original/ASAP_samplesheet.xlsx")
samplesheet <- samplesheet[!is.na(samplesheet$Sample),]
Gather featureCount outputs, extract region and cluster IDs, merge
them into cluster-specific count matrices and save them for convenience
as .csv
path <- "~/inbox/ASAP/truster_output_microglia"
files <- list.files(path, recursive = T, full.names = T)
files <- files[which(grepl("gene_counts/unique/", files))]
files <- files[which(!endsWith(files, "summary"))]
files <- data.frame(files = files)
files$region <- sapply(str_split(files$files, "/"), `[[`, 7)
files$cluster <- sapply(str_split(files$files, "_uniqueMap"), `[[`, 1)
files$cluster <- sapply(str_split(files$cluster, "/"), tail, 1)
files$cluster_num <- sapply(str_split(files$cluster, "_"), tail, 1)
files$region_cluster <- paste(files$region, files$cluster_num, sep="_")
files_split <- split(files, f = c(files$region_cluster))
regions <- c("SN", "PUT", "PFC", "AMY")
for(region in regions){
for(cluster in as.character(0:8)){
print(paste0(region, " cluster: ", cluster))
region_cluster <- paste(region, cluster, sep="_")
files_region_cluster <- files_split[[region_cluster]]
print(length(files_region_cluster$files))
for(f in 1:length(files_region_cluster$files)){
file <- files_region_cluster$files[f]
if(f == 1 & region == regions[1] & cluster == "0"){
gene_counts <- fread(file, data.table = F)
colnames(gene_counts)[ncol(gene_counts)] <- str_remove_all(sapply(str_split(colnames(gene_counts)[ncol(gene_counts)], "/"), tail, 1), "_Aligned.sortedByCoord.out.bam")
}else{
tmp <- fread(file, data.table = F)
colnames(tmp)[ncol(tmp)] <- str_remove_all(sapply(str_split(colnames(tmp)[ncol(tmp)], "/"), tail, 1), "_Aligned.sortedByCoord.out.bam")
if(all(tmp$Geneid == gene_counts$Geneid)){
gene_counts <- cbind(gene_counts, tmp[,ncol(tmp),drop=F])
}else{
gene_counts <- merge(gene_counts, tmp[,c("Geneid", colnames(tmp)[ncol(tmp)])], by="Geneid")
}
}
}
clusters_region_all <- str_remove(files_split[[region_cluster]]$cluster, paste0(region, "_"))
columns_keep <- clusters_region_all[which(clusters_region_all %in% colnames(gene_counts))]
gene_counts <- gene_counts[,c(colnames(gene_counts)[1:6], columns_keep)]
write.csv(gene_counts, paste("~/inbox/ASAP/truster_output_microglia/", region, "/gene_counts_", paste0(region, "_", cluster), ".csv", sep=""), row.names = F)
}
}
## [1] "SN cluster: 0"
## [1] 28
## [1] "SN cluster: 1"
## [1] 27
## [1] "SN cluster: 2"
## [1] 28
## [1] "SN cluster: 3"
## [1] 30
## [1] "SN cluster: 4"
## [1] 31
## [1] "SN cluster: 5"
## [1] 31
## [1] "SN cluster: 6"
## [1] 30
## [1] "SN cluster: 7"
## [1] 31
## [1] "SN cluster: 8"
## [1] 32
## [1] "PUT cluster: 0"
## [1] 30
## [1] "PUT cluster: 1"
## [1] 25
## [1] "PUT cluster: 2"
## [1] 29
## [1] "PUT cluster: 3"
## [1] 33
## [1] "PUT cluster: 4"
## [1] 29
## [1] "PUT cluster: 5"
## [1] 30
## [1] "PUT cluster: 6"
## [1] 32
## [1] "PUT cluster: 7"
## [1] 33
## [1] "PUT cluster: 8"
## [1] 33
## [1] "PFC cluster: 0"
## [1] 32
## [1] "PFC cluster: 1"
## [1] 27
## [1] "PFC cluster: 2"
## [1] 25
## [1] "PFC cluster: 3"
## [1] 35
## [1] "PFC cluster: 4"
## [1] 28
## [1] "PFC cluster: 5"
## [1] 37
## [1] "PFC cluster: 6"
## [1] 37
## [1] "PFC cluster: 7"
## [1] 37
## [1] "PFC cluster: 8"
## [1] 39
## [1] "AMY cluster: 0"
## [1] 27
## [1] "AMY cluster: 1"
## [1] 20
## [1] "AMY cluster: 2"
## [1] 26
## [1] "AMY cluster: 3"
## [1] 30
## [1] "AMY cluster: 4"
## [1] 23
## [1] "AMY cluster: 5"
## [1] 26
## [1] "AMY cluster: 6"
## [1] 30
## [1] "AMY cluster: 7"
## [1] 31
## [1] "AMY cluster: 8"
## [1] 30
Characterize cluster identity within each microglia subcluster. Run DEA comparing each subcluster to “other” subclusters and save results.
gene_counts <- list()
for(region in regions){
for(cluster in as.character(0:8)){
print(cluster)
print(region)
if(cluster == "0"){
gene_counts[[region]] <- fread(paste("~/inbox/ASAP/truster_output_microglia/", region, "/gene_counts_", region,"_", cluster,".csv", sep=""), sep = ",", data.table = F)
samplesheet_clusters <- samplesheet
samplesheet_clusters$cluster <- cluster
samplesheet_clusters$samples_cluster <- paste(samplesheet_clusters$Sample, cluster, sep="_")
}else{
tmp <- fread(paste("~/inbox/ASAP/truster_output_microglia/", region, "/gene_counts_", region,"_", cluster,".csv", sep=""), sep = ",", data.table = F)
if(all(tmp$Geneid == gene_counts[[region]]$Geneid)){
gene_counts[[region]] <- cbind(gene_counts[[region]], tmp[,c(7:ncol(tmp))])
}else{
gene_counts[[region]] <- merge(gene_counts[[region]], tmp[,c(1, 7:ncol(tmp))], by="Geneid")
}
samplesheet_tmp <- samplesheet
samplesheet_tmp$samples_cluster <- paste(samplesheet_tmp$Sample, cluster, sep="_")
samplesheet_tmp$cluster <- cluster
samplesheet_clusters <- rbind(samplesheet_clusters, samplesheet_tmp)
}
rownames(gene_counts[[region]]) <- gene_counts[[region]]$Geneid
}
}
## [1] "0"
## [1] "SN"
## [1] "1"
## [1] "SN"
## [1] "2"
## [1] "SN"
## [1] "3"
## [1] "SN"
## [1] "4"
## [1] "SN"
## [1] "5"
## [1] "SN"
## [1] "6"
## [1] "SN"
## [1] "7"
## [1] "SN"
## [1] "8"
## [1] "SN"
## [1] "0"
## [1] "PUT"
## [1] "1"
## [1] "PUT"
## [1] "2"
## [1] "PUT"
## [1] "3"
## [1] "PUT"
## [1] "4"
## [1] "PUT"
## [1] "5"
## [1] "PUT"
## [1] "6"
## [1] "PUT"
## [1] "7"
## [1] "PUT"
## [1] "8"
## [1] "PUT"
## [1] "0"
## [1] "PFC"
## [1] "1"
## [1] "PFC"
## [1] "2"
## [1] "PFC"
## [1] "3"
## [1] "PFC"
## [1] "4"
## [1] "PFC"
## [1] "5"
## [1] "PFC"
## [1] "6"
## [1] "PFC"
## [1] "7"
## [1] "PFC"
## [1] "8"
## [1] "PFC"
## [1] "0"
## [1] "AMY"
## [1] "1"
## [1] "AMY"
## [1] "2"
## [1] "AMY"
## [1] "3"
## [1] "AMY"
## [1] "4"
## [1] "AMY"
## [1] "5"
## [1] "AMY"
## [1] "6"
## [1] "AMY"
## [1] "7"
## [1] "AMY"
## [1] "8"
## [1] "AMY"
all(rownames(gene_counts$SN) == rownames(gene_counts$PUT))
## [1] TRUE
all(rownames(gene_counts$SN) == rownames(gene_counts$PFC))
## [1] TRUE
all(rownames(gene_counts$SN) == rownames(gene_counts$AMY))
## [1] TRUE
gene_counts_all <- cbind(gene_counts$SN,
gene_counts$PUT[,7:ncol(gene_counts$PUT)],
gene_counts$PFC[,7:ncol(gene_counts$PFC)],
gene_counts$AMY[,7:ncol(gene_counts$AMY)])
gene_cluster_dds_list <- list()
gene_cluster_exp_list <- list()
gene_cluster_res_list <- list()
gene_cluster_res_list_df <- list()
samplesheet <- read.xlsx("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/original/ASAP_samplesheet.xlsx")
samplesheet <- samplesheet[!is.na(samplesheet$Sample),]
for(cluster in as.character(0:8)){
gene_cluster_dds_list[[cluster]] <- list()
gene_cluster_exp_list[[cluster]] <- list()
gene_cluster_res_list[[cluster]] <- list()
gene_cluster_res_list_df[[cluster]] <- list()
samplesheet_clusters <- samplesheet_clusters[which(samplesheet_clusters$samples_cluster %in% colnames(gene_counts_all)),]
expressed_genes <- rownames(gene_counts_all)[which(rowSums(gene_counts_all[,samplesheet_clusters$samples_cluster]) > 0)]
rownames(samplesheet_clusters) <- samplesheet_clusters$samples_cluster
samplesheet_clusters$condition <- ifelse(samplesheet_clusters$cluster == cluster, cluster, "other")
gene_cluster_dds_list[[cluster]] <- DESeqDataSetFromMatrix((gene_counts_all[expressed_genes,samplesheet_clusters$samples_cluster]+1), samplesheet_clusters[samplesheet_clusters$samples_cluster,], design = ~ condition)
gene_cluster_dds_list[[cluster]]$condition <- relevel(gene_cluster_dds_list[[cluster]]$condition, "other")
gene_cluster_dds_list[[cluster]] <- DESeq(gene_cluster_dds_list[[cluster]])
gene_cluster_res_list[[cluster]] <- results(gene_cluster_dds_list[[cluster]], )
gene_cluster_res_list_df[[cluster]] <- as.data.frame(gene_cluster_res_list[[cluster]])
gene_cluster_res_list_df[[cluster]]$gene_id <- rownames(gene_cluster_res_list_df[[cluster]])
print(names(gene_cluster_res_list_df[[cluster]]))
openxlsx::write.xlsx(gene_cluster_res_list_df[[cluster]], paste("/Volumes/MyPassport/ASAP/data/ASAP_PMDBS_snRNAseq/results/tables/gene_microglia_subcluster_", cluster, "_cluster_DEA_snRNAseq_res.xlsx", sep=""))
}
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 168 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "baseMean" "log2FoldChange" "lfcSE" "stat"
## [5] "pvalue" "padj" "gene_id"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 161 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "baseMean" "log2FoldChange" "lfcSE" "stat"
## [5] "pvalue" "padj" "gene_id"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 230 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "baseMean" "log2FoldChange" "lfcSE" "stat"
## [5] "pvalue" "padj" "gene_id"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 235 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "baseMean" "log2FoldChange" "lfcSE" "stat"
## [5] "pvalue" "padj" "gene_id"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 919 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "baseMean" "log2FoldChange" "lfcSE" "stat"
## [5] "pvalue" "padj" "gene_id"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 215 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "baseMean" "log2FoldChange" "lfcSE" "stat"
## [5] "pvalue" "padj" "gene_id"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 177 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "baseMean" "log2FoldChange" "lfcSE" "stat"
## [5] "pvalue" "padj" "gene_id"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 157 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "baseMean" "log2FoldChange" "lfcSE" "stat"
## [5] "pvalue" "padj" "gene_id"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 169 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "baseMean" "log2FoldChange" "lfcSE" "stat"
## [5] "pvalue" "padj" "gene_id"
colnames(gene_counts$PUT)
## [1] "Geneid" "Chr"
## [3] "Start" "End"
## [5] "Strand" "Length"
## [7] "AA_ASAP100_ctrl_NP18-148_PUT_0" "AA_ASAP117_PD_NP22-55_PUT_0"
## [9] "AA_ASAP138_PD_NP16-285_PUT_0" "AA_ASAP139_PD_NP17-232_PUT_0"
## [11] "AA_ASAP140_PD_NP19-91_PUT_0" "AA_ASAP141_ctrl_NP19-218_PUT_0"
## [13] "AA_ASAP142_ctrl_NP22-37_PUT_0" "AA_ASAP144_PD_NP19-137_PUT_0"
## [15] "AA_ASAP146_PD_NP21-04_PUT_0" "AA_ASAP87_PD_NP21-208_PUT_0"
## [17] "AA_ASAP88_PD_NP21-217_PUT_0" "AA_ASAP93_ctrl_NP16-293_PUT_0"
## [19] "AA_ASAP94_ctrl_NP17-20_PUT_0" "AA_ASAP96_ctrl_NP18-46_PUT_0"
## [21] "AA_ASAP97_PD_NP21-57_PUT_0" "AA_ASAP98_ctrl_NP16-119_PUT_0"
## [23] "AA71_ASAP70_PD_NP18-304_PUT_0" "AA73_ASAP65_PD_NP16-269_PUT_0"
## [25] "AA74_ASAP62_PD_NP16-140_PUT_0" "AA76_ASAP74_PD_NP19-255_PUT_0"
## [27] "AA77_ASAP61_PD_NP16-25_PUT_0" "AA78_ASAP63_PD_NP16-160_PUT_0"
## [29] "AA79_ASAP64_PD_NP16-162_PUT_0" "AA81_ASAP67_PD_NP17-191_PUT_0"
## [31] "AA83_ASAP69_PD_NP18-287_PUT_0" "AA84_ASAP71_PD_NP19-16_PUT_0"
## [33] "AA85_ASAP72_PD_NP19-23_PUT_0" "AA87_ASAP76_ctrl_NP18-159_PUT_0"
## [35] "AA88_ASAP77_ctrl_NP19-36_PUT_0" "AA89_ASAP78_ctrl_NP19-37_PUT_0"
## [37] "AA_ASAP100_ctrl_NP18-148_PUT_1" "AA_ASAP117_PD_NP22-55_PUT_1"
## [39] "AA_ASAP138_PD_NP16-285_PUT_1" "AA_ASAP139_PD_NP17-232_PUT_1"
## [41] "AA_ASAP140_PD_NP19-91_PUT_1" "AA_ASAP141_ctrl_NP19-218_PUT_1"
## [43] "AA_ASAP144_PD_NP19-137_PUT_1" "AA_ASAP146_PD_NP21-04_PUT_1"
## [45] "AA_ASAP87_PD_NP21-208_PUT_1" "AA_ASAP88_PD_NP21-217_PUT_1"
## [47] "AA_ASAP93_ctrl_NP16-293_PUT_1" "AA_ASAP96_ctrl_NP18-46_PUT_1"
## [49] "AA_ASAP97_PD_NP21-57_PUT_1" "AA_ASAP98_ctrl_NP16-119_PUT_1"
## [51] "AA71_ASAP70_PD_NP18-304_PUT_1" "AA73_ASAP65_PD_NP16-269_PUT_1"
## [53] "AA74_ASAP62_PD_NP16-140_PUT_1" "AA76_ASAP74_PD_NP19-255_PUT_1"
## [55] "AA77_ASAP61_PD_NP16-25_PUT_1" "AA79_ASAP64_PD_NP16-162_PUT_1"
## [57] "AA81_ASAP67_PD_NP17-191_PUT_1" "AA83_ASAP69_PD_NP18-287_PUT_1"
## [59] "AA84_ASAP71_PD_NP19-16_PUT_1" "AA85_ASAP72_PD_NP19-23_PUT_1"
## [61] "AA87_ASAP76_ctrl_NP18-159_PUT_1" "AA_ASAP100_ctrl_NP18-148_PUT_2"
## [63] "AA_ASAP117_PD_NP22-55_PUT_2" "AA_ASAP138_PD_NP16-285_PUT_2"
## [65] "AA_ASAP139_PD_NP17-232_PUT_2" "AA_ASAP140_PD_NP19-91_PUT_2"
## [67] "AA_ASAP141_ctrl_NP19-218_PUT_2" "AA_ASAP142_ctrl_NP22-37_PUT_2"
## [69] "AA_ASAP144_PD_NP19-137_PUT_2" "AA_ASAP146_PD_NP21-04_PUT_2"
## [71] "AA_ASAP87_PD_NP21-208_PUT_2" "AA_ASAP88_PD_NP21-217_PUT_2"
## [73] "AA_ASAP92_ctrl_NP16-284_PUT_2" "AA_ASAP93_ctrl_NP16-293_PUT_2"
## [75] "AA_ASAP94_ctrl_NP17-20_PUT_2" "AA_ASAP97_PD_NP21-57_PUT_2"
## [77] "AA_ASAP98_ctrl_NP16-119_PUT_2" "AA71_ASAP70_PD_NP18-304_PUT_2"
## [79] "AA73_ASAP65_PD_NP16-269_PUT_2" "AA74_ASAP62_PD_NP16-140_PUT_2"
## [81] "AA76_ASAP74_PD_NP19-255_PUT_2" "AA77_ASAP61_PD_NP16-25_PUT_2"
## [83] "AA79_ASAP64_PD_NP16-162_PUT_2" "AA81_ASAP67_PD_NP17-191_PUT_2"
## [85] "AA82_ASAP68_PD_NP18-117_PUT_2" "AA83_ASAP69_PD_NP18-287_PUT_2"
## [87] "AA84_ASAP71_PD_NP19-16_PUT_2" "AA85_ASAP72_PD_NP19-23_PUT_2"
## [89] "AA87_ASAP76_ctrl_NP18-159_PUT_2" "AA89_ASAP78_ctrl_NP19-37_PUT_2"
## [91] "AA_ASAP100_ctrl_NP18-148_PUT_3" "AA_ASAP117_PD_NP22-55_PUT_3"
## [93] "AA_ASAP138_PD_NP16-285_PUT_3" "AA_ASAP139_PD_NP17-232_PUT_3"
## [95] "AA_ASAP140_PD_NP19-91_PUT_3" "AA_ASAP141_ctrl_NP19-218_PUT_3"
## [97] "AA_ASAP142_ctrl_NP22-37_PUT_3" "AA_ASAP144_PD_NP19-137_PUT_3"
## [99] "AA_ASAP146_PD_NP21-04_PUT_3" "AA_ASAP149_PD_NP23-21_PUT_3"
## [101] "AA_ASAP87_PD_NP21-208_PUT_3" "AA_ASAP88_PD_NP21-217_PUT_3"
## [103] "AA_ASAP92_ctrl_NP16-284_PUT_3" "AA_ASAP93_ctrl_NP16-293_PUT_3"
## [105] "AA_ASAP96_ctrl_NP18-46_PUT_3" "AA_ASAP97_PD_NP21-57_PUT_3"
## [107] "AA_ASAP98_ctrl_NP16-119_PUT_3" "AA71_ASAP70_PD_NP18-304_PUT_3"
## [109] "AA73_ASAP65_PD_NP16-269_PUT_3" "AA74_ASAP62_PD_NP16-140_PUT_3"
## [111] "AA76_ASAP74_PD_NP19-255_PUT_3" "AA77_ASAP61_PD_NP16-25_PUT_3"
## [113] "AA78_ASAP63_PD_NP16-160_PUT_3" "AA79_ASAP64_PD_NP16-162_PUT_3"
## [115] "AA81_ASAP67_PD_NP17-191_PUT_3" "AA82_ASAP68_PD_NP18-117_PUT_3"
## [117] "AA83_ASAP69_PD_NP18-287_PUT_3" "AA84_ASAP71_PD_NP19-16_PUT_3"
## [119] "AA85_ASAP72_PD_NP19-23_PUT_3" "AA86_ASAP73_PD_NP19-108_PUT_3"
## [121] "AA87_ASAP76_ctrl_NP18-159_PUT_3" "AA88_ASAP77_ctrl_NP19-36_PUT_3"
## [123] "AA89_ASAP78_ctrl_NP19-37_PUT_3" "AA_ASAP100_ctrl_NP18-148_PUT_4"
## [125] "AA_ASAP117_PD_NP22-55_PUT_4" "AA_ASAP138_PD_NP16-285_PUT_4"
## [127] "AA_ASAP139_PD_NP17-232_PUT_4" "AA_ASAP140_PD_NP19-91_PUT_4"
## [129] "AA_ASAP141_ctrl_NP19-218_PUT_4" "AA_ASAP142_ctrl_NP22-37_PUT_4"
## [131] "AA_ASAP144_PD_NP19-137_PUT_4" "AA_ASAP87_PD_NP21-208_PUT_4"
## [133] "AA_ASAP88_PD_NP21-217_PUT_4" "AA_ASAP93_ctrl_NP16-293_PUT_4"
## [135] "AA_ASAP94_ctrl_NP17-20_PUT_4" "AA_ASAP96_ctrl_NP18-46_PUT_4"
## [137] "AA_ASAP97_PD_NP21-57_PUT_4" "AA_ASAP98_ctrl_NP16-119_PUT_4"
## [139] "AA71_ASAP70_PD_NP18-304_PUT_4" "AA73_ASAP65_PD_NP16-269_PUT_4"
## [141] "AA74_ASAP62_PD_NP16-140_PUT_4" "AA76_ASAP74_PD_NP19-255_PUT_4"
## [143] "AA77_ASAP61_PD_NP16-25_PUT_4" "AA78_ASAP63_PD_NP16-160_PUT_4"
## [145] "AA79_ASAP64_PD_NP16-162_PUT_4" "AA81_ASAP67_PD_NP17-191_PUT_4"
## [147] "AA83_ASAP69_PD_NP18-287_PUT_4" "AA84_ASAP71_PD_NP19-16_PUT_4"
## [149] "AA85_ASAP72_PD_NP19-23_PUT_4" "AA86_ASAP73_PD_NP19-108_PUT_4"
## [151] "AA87_ASAP76_ctrl_NP18-159_PUT_4" "AA89_ASAP78_ctrl_NP19-37_PUT_4"
## [153] "AA_ASAP100_ctrl_NP18-148_PUT_5" "AA_ASAP117_PD_NP22-55_PUT_5"
## [155] "AA_ASAP138_PD_NP16-285_PUT_5" "AA_ASAP139_PD_NP17-232_PUT_5"
## [157] "AA_ASAP140_PD_NP19-91_PUT_5" "AA_ASAP141_ctrl_NP19-218_PUT_5"
## [159] "AA_ASAP144_PD_NP19-137_PUT_5" "AA_ASAP146_PD_NP21-04_PUT_5"
## [161] "AA_ASAP87_PD_NP21-208_PUT_5" "AA_ASAP88_PD_NP21-217_PUT_5"
## [163] "AA_ASAP93_ctrl_NP16-293_PUT_5" "AA_ASAP94_ctrl_NP17-20_PUT_5"
## [165] "AA_ASAP96_ctrl_NP18-46_PUT_5" "AA_ASAP97_PD_NP21-57_PUT_5"
## [167] "AA_ASAP98_ctrl_NP16-119_PUT_5" "AA71_ASAP70_PD_NP18-304_PUT_5"
## [169] "AA73_ASAP65_PD_NP16-269_PUT_5" "AA74_ASAP62_PD_NP16-140_PUT_5"
## [171] "AA76_ASAP74_PD_NP19-255_PUT_5" "AA77_ASAP61_PD_NP16-25_PUT_5"
## [173] "AA78_ASAP63_PD_NP16-160_PUT_5" "AA79_ASAP64_PD_NP16-162_PUT_5"
## [175] "AA81_ASAP67_PD_NP17-191_PUT_5" "AA82_ASAP68_PD_NP18-117_PUT_5"
## [177] "AA83_ASAP69_PD_NP18-287_PUT_5" "AA84_ASAP71_PD_NP19-16_PUT_5"
## [179] "AA85_ASAP72_PD_NP19-23_PUT_5" "AA86_ASAP73_PD_NP19-108_PUT_5"
## [181] "AA87_ASAP76_ctrl_NP18-159_PUT_5" "AA89_ASAP78_ctrl_NP19-37_PUT_5"
## [183] "AA_ASAP100_ctrl_NP18-148_PUT_6" "AA_ASAP117_PD_NP22-55_PUT_6"
## [185] "AA_ASAP138_PD_NP16-285_PUT_6" "AA_ASAP139_PD_NP17-232_PUT_6"
## [187] "AA_ASAP140_PD_NP19-91_PUT_6" "AA_ASAP141_ctrl_NP19-218_PUT_6"
## [189] "AA_ASAP144_PD_NP19-137_PUT_6" "AA_ASAP146_PD_NP21-04_PUT_6"
## [191] "AA_ASAP87_PD_NP21-208_PUT_6" "AA_ASAP88_PD_NP21-217_PUT_6"
## [193] "AA_ASAP92_ctrl_NP16-284_PUT_6" "AA_ASAP93_ctrl_NP16-293_PUT_6"
## [195] "AA_ASAP94_ctrl_NP17-20_PUT_6" "AA_ASAP96_ctrl_NP18-46_PUT_6"
## [197] "AA_ASAP97_PD_NP21-57_PUT_6" "AA_ASAP98_ctrl_NP16-119_PUT_6"
## [199] "AA71_ASAP70_PD_NP18-304_PUT_6" "AA73_ASAP65_PD_NP16-269_PUT_6"
## [201] "AA74_ASAP62_PD_NP16-140_PUT_6" "AA76_ASAP74_PD_NP19-255_PUT_6"
## [203] "AA77_ASAP61_PD_NP16-25_PUT_6" "AA78_ASAP63_PD_NP16-160_PUT_6"
## [205] "AA79_ASAP64_PD_NP16-162_PUT_6" "AA81_ASAP67_PD_NP17-191_PUT_6"
## [207] "AA82_ASAP68_PD_NP18-117_PUT_6" "AA83_ASAP69_PD_NP18-287_PUT_6"
## [209] "AA84_ASAP71_PD_NP19-16_PUT_6" "AA85_ASAP72_PD_NP19-23_PUT_6"
## [211] "AA86_ASAP73_PD_NP19-108_PUT_6" "AA87_ASAP76_ctrl_NP18-159_PUT_6"
## [213] "AA88_ASAP77_ctrl_NP19-36_PUT_6" "AA89_ASAP78_ctrl_NP19-37_PUT_6"
## [215] "AA_ASAP100_ctrl_NP18-148_PUT_7" "AA_ASAP117_PD_NP22-55_PUT_7"
## [217] "AA_ASAP138_PD_NP16-285_PUT_7" "AA_ASAP139_PD_NP17-232_PUT_7"
## [219] "AA_ASAP140_PD_NP19-91_PUT_7" "AA_ASAP141_ctrl_NP19-218_PUT_7"
## [221] "AA_ASAP144_PD_NP19-137_PUT_7" "AA_ASAP146_PD_NP21-04_PUT_7"
## [223] "AA_ASAP149_PD_NP23-21_PUT_7" "AA_ASAP87_PD_NP21-208_PUT_7"
## [225] "AA_ASAP88_PD_NP21-217_PUT_7" "AA_ASAP92_ctrl_NP16-284_PUT_7"
## [227] "AA_ASAP93_ctrl_NP16-293_PUT_7" "AA_ASAP94_ctrl_NP17-20_PUT_7"
## [229] "AA_ASAP96_ctrl_NP18-46_PUT_7" "AA_ASAP97_PD_NP21-57_PUT_7"
## [231] "AA_ASAP98_ctrl_NP16-119_PUT_7" "AA71_ASAP70_PD_NP18-304_PUT_7"
## [233] "AA73_ASAP65_PD_NP16-269_PUT_7" "AA74_ASAP62_PD_NP16-140_PUT_7"
## [235] "AA76_ASAP74_PD_NP19-255_PUT_7" "AA77_ASAP61_PD_NP16-25_PUT_7"
## [237] "AA78_ASAP63_PD_NP16-160_PUT_7" "AA79_ASAP64_PD_NP16-162_PUT_7"
## [239] "AA81_ASAP67_PD_NP17-191_PUT_7" "AA82_ASAP68_PD_NP18-117_PUT_7"
## [241] "AA83_ASAP69_PD_NP18-287_PUT_7" "AA84_ASAP71_PD_NP19-16_PUT_7"
## [243] "AA85_ASAP72_PD_NP19-23_PUT_7" "AA86_ASAP73_PD_NP19-108_PUT_7"
## [245] "AA87_ASAP76_ctrl_NP18-159_PUT_7" "AA88_ASAP77_ctrl_NP19-36_PUT_7"
## [247] "AA89_ASAP78_ctrl_NP19-37_PUT_7" "AA_ASAP100_ctrl_NP18-148_PUT_8"
## [249] "AA_ASAP117_PD_NP22-55_PUT_8" "AA_ASAP138_PD_NP16-285_PUT_8"
## [251] "AA_ASAP139_PD_NP17-232_PUT_8" "AA_ASAP140_PD_NP19-91_PUT_8"
## [253] "AA_ASAP141_ctrl_NP19-218_PUT_8" "AA_ASAP144_PD_NP19-137_PUT_8"
## [255] "AA_ASAP146_PD_NP21-04_PUT_8" "AA_ASAP149_PD_NP23-21_PUT_8"
## [257] "AA_ASAP87_PD_NP21-208_PUT_8" "AA_ASAP88_PD_NP21-217_PUT_8"
## [259] "AA_ASAP92_ctrl_NP16-284_PUT_8" "AA_ASAP93_ctrl_NP16-293_PUT_8"
## [261] "AA_ASAP94_ctrl_NP17-20_PUT_8" "AA_ASAP96_ctrl_NP18-46_PUT_8"
## [263] "AA_ASAP97_PD_NP21-57_PUT_8" "AA_ASAP98_ctrl_NP16-119_PUT_8"
## [265] "AA71_ASAP70_PD_NP18-304_PUT_8" "AA73_ASAP65_PD_NP16-269_PUT_8"
## [267] "AA74_ASAP62_PD_NP16-140_PUT_8" "AA76_ASAP74_PD_NP19-255_PUT_8"
## [269] "AA77_ASAP61_PD_NP16-25_PUT_8" "AA78_ASAP63_PD_NP16-160_PUT_8"
## [271] "AA79_ASAP64_PD_NP16-162_PUT_8" "AA81_ASAP67_PD_NP17-191_PUT_8"
## [273] "AA82_ASAP68_PD_NP18-117_PUT_8" "AA83_ASAP69_PD_NP18-287_PUT_8"
## [275] "AA84_ASAP71_PD_NP19-16_PUT_8" "AA85_ASAP72_PD_NP19-23_PUT_8"
## [277] "AA86_ASAP73_PD_NP19-108_PUT_8" "AA87_ASAP76_ctrl_NP18-159_PUT_8"
## [279] "AA88_ASAP77_ctrl_NP19-36_PUT_8" "AA89_ASAP78_ctrl_NP19-37_PUT_8"
Differential expression analyses for PD vs Control pseudobulks for each region and subcluster pair. Save per-subcluster results (one excel with region per sheet).
library(openxlsx)
gene_dds_list <- list()
gene_exp_list <- list()
gene_res_list <- list()
gene_res_list_df <- list()
samplesheet_list <- split(samplesheet, f = samplesheet$Region)
for(cluster in as.character(0:8)){
gene_dds_list[[cluster]] <- list()
gene_exp_list[[cluster]] <- list()
gene_res_list[[cluster]] <- list()
gene_res_list_df[[cluster]] <- list()
for(region in regions){
print(cluster)
print(region)
rownames(samplesheet_list[[region]]) <- samplesheet_list[[region]]$Sample
samples_cluster <- paste(rownames(samplesheet_list[[region]]), cluster, sep="_")
samples_cluster <- samples_cluster[which(samples_cluster %in% colnames(gene_counts[[region]]))]
rownames(samplesheet_list[[region]]) <- paste(rownames(samplesheet_list[[region]]), cluster, sep="_")
rownames(gene_counts[[region]]) <- gene_counts[[region]]$Geneid
expressed_genes <- rownames(gene_counts[[region]])[which(rowSums(gene_counts[[region]][,samples_cluster]) > 0)]
gene_dds_list[[cluster]][[region]] <- DESeqDataSetFromMatrix((gene_counts[[region]][expressed_genes,samples_cluster]+1), samplesheet_list[[region]][samples_cluster,], design = ~ Dx)
gene_dds_list[[cluster]][[region]]$Dx <- relevel(gene_dds_list[[cluster]][[region]]$Dx, "Ctl")
gene_dds_list[[cluster]][[region]] <- DESeq(gene_dds_list[[cluster]][[region]])
gene_exp_list[[cluster]][[region]] <- getAverage(gene_dds_list[[cluster]][[region]])
gene_res_list[[cluster]][[region]] <- results(gene_dds_list[[cluster]][[region]], )
gene_res_list_df[[cluster]][[region]] <- as.data.frame(gene_res_list[[cluster]][[region]])
gene_res_list_df[[cluster]][[region]]$gene_id <- rownames(gene_res_list_df[[cluster]][[region]])
print(names(gene_res_list_df[[cluster]]))
}
openxlsx::write.xlsx(gene_res_list_df[[cluster]], paste("/Volumes/MyPassport/ASAP/data/ASAP_PMDBS_snRNAseq/results/tables/gene_microglia_subcluster_", cluster, "_PD_DEA_snRNAseq_res.xlsx", sep=""))
}
## [1] "0"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 238 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "0"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 163 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT"
## [1] "0"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 102 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC"
## [1] "0"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 132 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC" "AMY"
## [1] "1"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 297 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "1"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 114 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT"
## [1] "1"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 193 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC"
## [1] "1"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 203 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC" "AMY"
## [1] "2"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 336 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "2"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 216 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT"
## [1] "2"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 103 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC"
## [1] "2"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 189 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC" "AMY"
## [1] "3"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 241 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "3"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 137 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT"
## [1] "3"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 57 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC"
## [1] "3"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 165 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC" "AMY"
## [1] "4"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 505 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "4"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 141 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT"
## [1] "4"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 192 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC"
## [1] "4"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 351 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC" "AMY"
## [1] "5"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 210 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "5"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 70 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT"
## [1] "5"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 97 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC"
## [1] "5"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 177 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC" "AMY"
## [1] "6"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 160 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "6"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 105 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT"
## [1] "6"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 30 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC"
## [1] "6"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 78 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC" "AMY"
## [1] "7"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 126 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "7"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 91 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT"
## [1] "7"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 28 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC"
## [1] "7"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 70 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC" "AMY"
## [1] "8"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 156 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "8"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 120 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT"
## [1] "8"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 70 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC"
## [1] "8"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 79 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "PFC" "AMY"
Visualize mean expression vs log2FC for each subcluster-region
mean_plots_list <- list()
for(cluster in as.character(0:8)){
mean_plots_list[[cluster]] <- list()
for(region in regions){
effect <- unique(samplesheet_list[[region]][which(samplesheet_list[[region]]$Dx != "Ctl"),"Dx"])
colors_list <- pick_colors_meanplot(gene_res_list[[cluster]][[region]])
mean_plots_list[[cluster]][[region]] <- meanPlot_cus(exp = gene_exp_list[[cluster]][[region]]$Mean,
test = gene_res_list[[cluster]][[region]],
c1 = effect, c2="Ctl",
col1=colors_list$col1, col2=colors_list$col2, col3=colors_list$col3,
p = 0.05, l=1, ttl = paste0(region, "_", cluster), repel = F)
}
}
mean_plots_list
## $`0`
## $`0`$SN
## Warning: Removed 18932 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`0`$PUT
## Warning: Removed 19372 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`0`$PFC
## Warning: Removed 17092 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`0`$AMY
## Warning: Removed 19484 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`1`
## $`1`$SN
## Warning: Removed 19528 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`1`$PUT
## Warning: Removed 20317 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`1`$PFC
## Warning: Removed 17758 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`1`$AMY
## Warning: Removed 20614 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`2`
## $`2`$SN
## Warning: Removed 20211 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`2`$PUT
## Warning: Removed 20600 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`2`$PFC
## Warning: Removed 18105 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`2`$AMY
## Warning: Removed 18967 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`3`
## $`3`$SN
## Warning: Removed 17986 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`3`$PUT
## Warning: Removed 17547 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`3`$PFC
## Warning: Removed 16687 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`3`$AMY
## Warning: Removed 17849 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`4`
## $`4`$SN
## Warning: Removed 20001 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`4`$PUT
## Warning: Removed 17373 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`4`$PFC
## Warning: Removed 15598 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`4`$AMY
## Warning: Removed 16549 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`5`
## $`5`$SN
## Warning: Removed 16295 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`5`$PUT
## Warning: Removed 16944 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`5`$PFC
## Warning: Removed 14898 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`5`$AMY
## Warning: Removed 16388 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`6`
## $`6`$SN
## Warning: Removed 15552 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`6`$PUT
## Warning: Removed 15099 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`6`$PFC
## Warning: Removed 14138 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`6`$AMY
## Warning: Removed 15808 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`7`
## $`7`$SN
## Warning: Removed 15213 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`7`$PUT
## Warning: Removed 15345 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`7`$PFC
## Warning: Removed 14269 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`7`$AMY
## Warning: Removed 15627 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`8`
## $`8`$SN
## Warning: Removed 16513 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`8`$PUT
## Warning: Removed 16326 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`8`$PFC
## Warning: Removed 15455 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`8`$AMY
## Warning: Removed 16617 rows containing missing values or values outside the scale range
## (`geom_point()`).
Since these were the terms we saw up in microglia, let’s see if any subcluster in particular over-expresses these genes. Visualize across regions and clusters.
ifn_microglia_genes <- openxlsx::read.xlsx("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/original/results/tables_vs2/gene_analysis/IFN_related_genes_GO_microglia.xlsx")$gene_id
ifn_microglia_genes_res <- data.frame()
for(cluster in as.character(0:8)){
for(region in regions){
tmp <- gene_res_list_df[[cluster]][[region]][ifn_microglia_genes, ]
tmp$type <- paste(region, cluster, sep="_")
ifn_microglia_genes_res <- rbind(ifn_microglia_genes_res, tmp[,c("gene_id", "log2FoldChange", "type")])
}
}
ifn_microglia_genes_res$cluster <- sapply(str_split(ifn_microglia_genes_res$type, "_"), `[[`, 2)
ifn_microglia_genes_res$region <- sapply(str_split(ifn_microglia_genes_res$type, "_"), `[[`, 1)
ifn_microglia_genes_res$region <- factor(ifn_microglia_genes_res$region, levels=c("SN", "PUT", "PFC", "AMY"))
cluster_colors = c("0" = "#8ed1c6",
"1" = "#eec819",
"2" = "#bebada",
"3" = "#f48073",
"4" = "#80b2d3",
"5" = "#fbb363",
"6" = "#b4d66c",
"7" = "#f9cde1",
"8" = "#d9d9d8")
ifn_microglia_genes_res %>%
filter(region == "SN") %>%
ggplot(aes(x=cluster, y=log2FoldChange, fill=cluster)) + geom_jitter(color="lightgrey", width = 0.2, size=0.3, color="lightgrey") + geom_violin(alpha=0.5) + geom_boxplot(width = 0.2, outliers = F) +
theme_bw() + geom_hline(yintercept = 0, linetype="dashed") + scale_fill_manual(values= cluster_colors) + ggtitle("IFN core enrichment genes: SN Microglia")
## Warning: Duplicated aesthetics after name standardisation: colour
## Warning: Removed 68 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 68 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 68 rows containing missing values or values outside the scale range
## (`geom_point()`).
ifn_microglia_genes_res %>%
filter(region == "PUT") %>%
ggplot(aes(x=cluster, y=log2FoldChange, fill=cluster)) + geom_jitter(color="lightgrey", width = 0.2, size=0.3, color="lightgrey") + geom_violin(alpha=0.5) + geom_boxplot(width = 0.2, outliers = F) +
theme_bw() + geom_hline(yintercept = 0, linetype="dashed") + scale_fill_manual(values= cluster_colors) + ggtitle("IFN core enrichment genes: PUT Microglia")
## Warning: Duplicated aesthetics after name standardisation: colour
## Warning: Removed 64 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 64 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 64 rows containing missing values or values outside the scale range
## (`geom_point()`).
ifn_microglia_genes_res %>%
filter(region == "PFC") %>%
ggplot(aes(x=cluster, y=log2FoldChange, fill=cluster)) + geom_jitter(color="lightgrey", width = 0.2, size=0.3, color="lightgrey") + geom_violin(alpha=0.5) + geom_boxplot(width = 0.2, outliers = F) +
theme_bw() + geom_hline(yintercept = 0, linetype="dashed") + scale_fill_manual(values= cluster_colors) + ggtitle("IFN core enrichment genes: PFC Microglia")
## Warning: Duplicated aesthetics after name standardisation: colour
## Warning: Removed 110 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 110 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 110 rows containing missing values or values outside the scale range
## (`geom_point()`).
ifn_microglia_genes_res %>%
filter(region == "AMY") %>%
ggplot(aes(x=cluster, y=log2FoldChange, fill=cluster)) + geom_jitter(color="lightgrey", width = 0.2, size=0.3, color="lightgrey") + geom_violin(alpha=0.5) + geom_boxplot(width = 0.2, outliers = F) +
theme_bw() + geom_hline(yintercept = 0, linetype="dashed") + scale_fill_manual(values= cluster_colors) + ggtitle("IFN core enrichment genes: AMY Microglia")
## Warning: Duplicated aesthetics after name standardisation: colour
## Warning: Removed 83 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 83 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 83 rows containing missing values or values outside the scale range
## (`geom_point()`).
Overlay expression of key immune-related genes as examples.
microglia_gene_counts <- fread("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/original/results/tables_microglia/microglia_subcluster_genes_counts.csv", data.table=F)
microglia_gene_counts <- microglia_gene_counts[,c("V1", "CIITA", "IFNAR1", "TLR4", "LRRK2")]
microglia_umap <- fread("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/original/results/tables_microglia/microglia_subcluster_clustering_umap_barcodes.csv", data.table=F)
microglia_leiden <- fread("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/original/results/tables_microglia/microglia_subcluster_clustering_leiden_barcodes.csv", data.table=F)
microglia_gene_counts <- merge(merge(microglia_gene_counts, microglia_leiden, by="V1"), microglia_umap, by="V1")
microglia_gene_counts %>%
arrange(TLR4) %>%
ggplot(aes(x=V2, y=V3, color=TLR4)) +
geom_point(size=0.3) + facet_wrap(region~dx, ncol=2) + theme_bw() +
scale_color_distiller(palette = "Reds", direction = 1)
microglia_gene_counts %>%
arrange(IFNAR1) %>%
ggplot(aes(x=V2, y=V3, color=IFNAR1)) +
geom_point(size=0.3) + facet_wrap(region~dx, ncol=2) + theme_bw() +
scale_color_distiller(palette = "Reds", direction = 1)